package org.biogroovy.io.eutils;

import groovy.util.logging.Slf4j

import javax.xml.namespace.QName
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.xpath.XPath
import javax.xml.xpath.XPathConstants
import javax.xml.xpath.XPathFactory

import org.biogroovy.conf.BioGroovyConfig
import org.biogroovy.eutils.EUtilsURLFactory
import org.biogroovy.io.AbsSeqReader
import org.biogroovy.io.IFetcher;
import org.biogroovy.models.*
import org.w3c.dom.Node
import org.w3c.dom.NodeList

/**
 * This class reads EntrezGene records and generates a Gene object.
 */
@Slf4j
class EntrezGeneReader extends AbsSeqReader<Gene>{

	/** This map contains XPath expressions used to extract fields from an XML document */
	static final Map<String, String> XPATH_MAP = [
		entrezGeneId:'/Entrezgene-Set/Entrezgene/Entrezgene_track-info/Gene-track/Gene-track_geneid',
		description:'/Entrezgene-Set/Entrezgene/Entrezgene_summary',
		name:'/Entrezgene-Set/Entrezgene/Entrezgene_gene/Gene-ref/Gene-ref_desc',
		symbol:'/Entrezgene-Set/Entrezgene/Entrezgene_gene/Gene-ref/Gene-ref_locus',
		species:'/Entrezgene-Set/Entrezgene/Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_taxname',
		synonyms:'/Entrezgene-Set/Entrezgene/Entrezgene_gene/Gene-ref/Gene-ref_syn/Gene-ref_syn_E',
		references:'//Dbtag[Dbtag_db="HGNC"] | //Dbtag[Dbtag_db="HPRD"] | //Dbtag[Dbtag_db="MIM"] | //Dbtag[Dbtag_db="Ensembl"]',
		articles:'//Gene-commentary[Gene-commentary_type/@value="generif"]',
		phenotype:'//Gene-commentary[Gene-commentary_type/@value="phenotype"]',
		goList:'/Entrezgene-Set/Entrezgene/Entrezgene_properties/Gene-commentary[Gene-commentary_heading="GeneOntology"]/Gene-commentary_comment/Gene-commentary'
	];

	/** This map contains the nodetypes for each of the fields specified in the XPath maps */
	static final Map<String, QName> NODE_TYPE_MAP = [
		entrezGeneId:XPathConstants.NUMBER,
		description:XPathConstants.STRING,
		name:XPathConstants.STRING,
		symbol:XPathConstants.STRING,
		species:XPathConstants.STRING,
		synonyms:XPathConstants.NODESET,

	]

	static final String ROOT_PATH = "//Entrezgene-Set/Entrezgene";

    static final String DATABASE_NAME = "entrezgene";
	
	String tool = null;
	String email = null;
	
	/**
	 * Constructor.
	 */
	public EntrezGeneReader(){
		this.databaseName = DATABASE_NAME;
		ConfigObject conf = BioGroovyConfig.getConfig();
		this.tool = conf.eutils.tool
		this.email = conf.eutils.email
	}

	/**
	 * This method retrieves an EntrezGene record and parses the contents.
	 * @param id  The ID of the EntrezGene record to be retrieved and parsed.
	 * @throws IOException if there is a problem retrieving or parsing the file.
	 */
	public Gene fetch(String id) throws IOException{
		URL url = getUrl(id, [tool:this.tool, email:this.email]);
		return read(url.openStream());
	}

	/**
	 * This convenience method populates a Gene object with the data from EntrezGene.
	 * @param gene
	 */
	public void read(Gene gene){
		ConfigObject conf = BioGroovyConfig.getConfig()
		URL url = getUrl(gene.entrezGeneId, null);
		read(url.openStream(), gene);
	}

	/**
	 * This method reads a file containing a single gene record.
	 * @param file  The file to be parsed.
	 */
	public Gene read(File file){
		if (!file.exists()){
			throw new FileNotFoundException("The file was not found: ${file.getName()}" )
		}
		return read(new FileInputStream(file));
	}

	/**
	 * This method parses an input stream containing record for a single gene.
	 * @param is The input stream
	 */
	public Gene read(InputStream is){
		Gene gene = new Gene();
		read(is, gene);
		return gene;
	}

	/**
	 * This method parses an input stream containing the record for a single gene.
	 * @param is the input stream containing the gene data.
	 * @param gene the gene object to be populated.
	 */
	public void read(InputStream is, Gene gene){
		def builder  = DocumentBuilderFactory.newInstance().newDocumentBuilder();
		def root     = builder.parse(is).documentElement

		parseData(root, gene, XPATH_MAP, NODE_TYPE_MAP);
		gene.accession = gene.entrezGeneId;
		parseDbReferences(root, gene);
		parseArticles(root, gene);
		parseGo(root, gene);
		parsePhenotype(root, gene);

		root = null;
		//builder = null;
		is = null;
	}



	/**
	 * This method reads a list of genes from the XML document.
	 * @param inputStream  The stream of XML. 
	 * @return a list of Gene objects
	 * @throws IOException if there is a problem reading the XML.
	 */
	public List<Gene> readList(InputStream inputStream) throws IOException{
		List<Gene> geneList = new ArrayList<Gene>();

		def factory = DocumentBuilderFactory.newInstance();
				
		factory.setNamespaceAware(false);
		factory.setValidating(false);
		factory.setFeature("http://xml.org/sax/features/namespaces", false);
		factory.setFeature("http://xml.org/sax/features/validation", false);
		factory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
		factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
		factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false)
		
		def builder  = factory.newDocumentBuilder();
		def root     = builder.parse(inputStream).documentElement

		XPath xpath = XPathFactory.newInstance().newXPath();
		NodeList nodeList = (NodeList)xpath.evaluate(ROOT_PATH, root, XPathConstants.NODESET);
		for(int i=0; i < nodeList.getLength(); i++){
			Node node = nodeList.item(i)
			Gene gene = new Gene();
			parseData(node, gene, XPATH_MAP, NODE_TYPE_MAP);
			gene.accession= gene.entrezGeneId;
			parseDbReferences(node, gene);
			parseArticles(node, gene);
			parseGo(node, gene);
			parsePhenotype(node, gene);
			geneList.add(gene);
		}

		//builder = null;
		root = null;

		return geneList;
	}

	@Override
	public void parse(Gene gene, Node node) {
		parseData(node, gene, XPATH_MAP, NODE_TYPE_MAP);
		parseDbReferences(node, gene);
		parseArticles(node, gene);
		parseGo(node, gene);
		parsePhenotype(node, gene);
	}

	/**
	 * This method parses database references for this gene.  
	 * @param root	The root node of the EntrezGene record.
	 * @param gene	The Gene object being populated.
	 */
	protected void parseDbReferences(def root, Gene gene){
		XPathFactory factory = XPathFactory.newInstance()
		def xpath = factory.newXPath();
		def nodeSet =  xpath.evaluate( XPATH_MAP.references, root, XPathConstants.NODESET );

		def dbXpath = factory.newXPath();
		def dbIdXpath = factory.newXPath();

		nodeSet.eachWithIndex{ it, count ->
			String id = dbIdXpath.evaluate('Dbtag_tag/Object-id/Object-id_id', it, XPathConstants.STRING);
            String db = dbXpath.evaluate('Dbtag_db', it, XPathConstants.STRING)

            // the id is a string, with a different path
			id = (id == null || id.equals(""))?dbIdXpath.evaluate('Dbtag_tag/Object-id/Object-id_str', it, XPathConstants.STRING):id;
			gene.references.put(db, id)
		}
	}


	/**
	 * This method parses the phenotypes associated with this gene.
	 * @param root The root node of the EntrezGene record.
	 * @param gene The Gene object being populated.
	 */
	protected void parsePhenotype(def root, Gene gene) {
		XPathFactory factory = XPathFactory.newInstance()
		XPath xpath = factory.newXPath();
		XPath titlePath = factory.newXPath();
		XPath medGenPath = factory.newXPath();
		XPath omimPath = factory.newXPath();
		XPath descPath = factory.newXPath();
		XPath refsPath = factory.newXPath();


		def nodeSet =  xpath.evaluate( XPATH_MAP.phenotype, root, XPathConstants.NODESET );
		log.debug "nodeset: " + nodeSet;
		nodeSet.each { phenotypeNode ->

			Phenotype phenotype = new Phenotype();
			phenotype.name = titlePath.evaluate("Gene-commentary_heading", phenotypeNode, XPathConstants.STRING);
			//        phenotype.medGenId = medGenPath.evaluate(".//Gene-commentary_heading", phenotypeNode, XPathConstants.NODE);
			phenotype.description = descPath.evaluate(".//Gene-commentary_text", phenotypeNode, XPathConstants.STRING);


			def refNodes = refsPath.evaluate(".//Pub/Pub_pmid/PubMedId", phenotypeNode, XPathConstants.NODESET);
			refNodes.each{ refNode ->
				phenotype.refs.add(new Article(pubmedId: refNode.getNodeValue()));
			}

			gene.phenotypes.add(phenotype);
		}
	}

	/**
	 * This method parses the GeneRIF articles in the EntrezGene records.
	 * @param root  The root node of the record.
	 * @param gene  The gene object being filled.
	 */
	public void parseArticles(def root, Gene gene){
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();

		def xpathTitle =factory.newXPath();
		def xpathPmId = factory.newXPath();

		def nodeSet = xpath.evaluate (XPATH_MAP.articles,  root, XPathConstants.NODESET);
		nodeSet.each{
            String title = xpathTitle.evaluate('Gene-commentary_text', it, XPathConstants.STRING);
            if (title != null && title != '') {
                Article article = new Article();
                article.title = title;
                article.pubmedId = xpathPmId.evaluate('.//PubMedId', it, XPathConstants.STRING);
			    gene.articles.add(article);
            }
		}
	}



	/**
	 * This method parses the GO entries for the EntrezGene records.
	 * @param root	The root node of the EntrezGene record.
	 * @param gene	The gene object being filled.
	 */
	public void parseGo(def root, Gene gene){
		XPathFactory factory = XPathFactory.newInstance();
		def xpath = factory.newXPath();
		def xpathGoId = XPathFactory.newInstance().newXPath();
		def xpathName = XPathFactory.newInstance().newXPath();
		def xpathType = XPathFactory.newInstance().newXPath();
		def xpathEv = XPathFactory.newInstance().newXPath();
        def xpathArticle = XPathFactory.newInstance().newXPath();

		def nodeSet = xpath.evaluate (XPATH_MAP.goList,  root, XPathConstants.NODESET);
		nodeSet.eachWithIndex{it, i ->

			GeneOntology go = new GeneOntology();
			go.goId = xpathGoId.evaluate('.//Object-id_id', it, XPathConstants.NUMBER);
			go.name = xpathName.evaluate ('.//Other-source_anchor', it, XPathConstants.STRING);
			
			String tempGoType = xpathType.evaluate('./Gene-commentary_label', it, XPathConstants.STRING);
			go.type = GeneOntologyType.identifyType(tempGoType);
			if (go.type == GeneOntologyType.FUNCTION){
				gene.goFunctionList.add(go)
			}else if (go.type == GeneOntologyType.PROCESS){
				gene.goProcessList.add(go);
			}else if (go.type == GeneOntologyType.COMPONENT){
				gene.goComponentList.add(go)
			}
			
			String tempEv = xpathEv.evaluate('.//Other-source_post-text',it,  XPathConstants.STRING)
			String[] tempEvArray =  tempEv.split(":");

			go.evidence = GeneOntologyEvidence.valueOf(tempEvArray[1].trim());

            def arts = xpathArticle.evaluate(".//PubMedId", it, XPathConstants.NODESET);
            arts.each{Node art ->
                go.pmidList.add(art.getTextContent());
            }
		}
	}


	/**
	 * This method reads an EntrezGene file and returns the Gene object
	 * parsed from the content of the file.
	 * @param file  A valid file path
	 * @throws IOException if the file cannot be found, or if there is a problem reading the file.
	 */
	public Gene readFile(String file) throws IOException{

		File fileObj = new File(file);
		if (!fileObj.exists()){
			throw new IOException("File does not exist: ${fileObj.absolutePath}")
		}

		return read(fileObj);
	}


	@Override
	public List<Gene> fetchAll(String id) throws IOException {
		URL url = getUrl(id, [tool:this.tool, email:this.email]);
		return readList(url.openStream());
	}

	@Override
	public URL getUrl(String id, Map<String, String> params) {
		Map<String, String> map = [db:EUtilsURLFactory.DB_GENE,id:id,retmode:'xml' ]
		map.putAll(params)
		
		String url = EUtilsURLFactory.getURL(EUtilsURLFactory.EFETCH, map);
		return new URL(url)
	}

	@Override
	public IFetcher<Gene> getNewInstance() {
		return new EntrezGeneReader();
	}



}

